set more off
set matsize 11000
foreach dataset in   cps brfss cex  {

	clear 
	global mypath "//rschfs1x/userRS/A-E/dbr88_RS/Documents/Selection/\`dataset'"
	global tables "off"

****Merge and combine raw datasets
	if "`dataset'"=="brfss" {
	clear
	fdause "${mypath}/raw/LLCP2012.XPT", clear
	}
	
	else if "`dataset'"=="cex" {
		clear
		foreach year in 08 09 10 11 12 13 {
			if "`year'"=="08" local pyear 07
			else if "`year'"=="09" local pyear 08 
			else if "`year'"=="10" local pyear 09
			else if "`year'"=="11" local pyear 10
			else  if "`year'"=="12"   local pyear 11
			else  if "`year'"=="13"   local pyear 12 

			***Create paradata set*****
			if "`year'"=="08" {
				use "${mypath}/raw/intrvw`year'/mchi0809.dta", clear
			}
			else if "`year'"!="08"{
				use "${mypath}/raw/intrvw`year'/mchi`pyear'`year'.dta", clear
				}
			tostring cntckey, replace
			g contacts_old=substr(cntckey,2,2)
			destring contacts, replace
			
			byso newid: egen contacts_all=max(contacts_old)
				byso newid: g last_hour_all= visit_hr if contacts_old==contacts_all
				label variable last_hour_all "Visit hour of the last contact"
			
			recode last_hour_all (0/12=1) (13/14=2) (15/16=3) (17/23=4) (miss=.), g(hour_group2_all)
				label define hour_group 1 "Hour 0-12" 2 "Hour 13-14" 3 "Hour 15-16" 4 "Hour 17-23" 5 "Missing hour"
				label values hour_group2_all hour_group
				label variable hour_group2_all "Hour of visit for last contact attempt"		
			byso newid: egen hour_group_all=max(hour_group2_all)	
			
			g interview=interi
				destring interview, replace
			pause
			keep contacts_all newid newid interview hour_group_all
			
			
			byso newid: keep if _n==1
			save "${mypath}/data/cex_contactattempts_`year'.dta", replace
			
			******First attempts
			clear
			if "`year'"=="08" {
				use "${mypath}/raw/intrvw`year'/mchi0809.dta", clear
			}
			else if "`year'"!="08"{
				use "${mypath}/raw/intrvw`year'/mchi`pyear'`year'.dta", clear
			}
			keep if interi=="1"
			tostring cntckey, replace
			g contacts_old=real(substr(cntckey,2,2))
			byso cuid: egen contacts_firstinterview=max(contacts_old)
			byso cuid: g first_hour=visit_hr if contacts_old==1
				label variable first_hour "Visit hour of the first contact"
			byso cuid: g last_hour= visit_hr if contacts_old==contacts_firstinterview
				label variable last_hour "Visit hour of the last contact"
			recode last_hour (0/12=1) (13/14=2) (15/16=3) (17/23=4) (miss=.), g(hour_group2)
				label define hour_group 1 "Hour 0-12" 2 "Hour 13-14" 3 "Hour 15-16" 4 "Hour 17-23" 5 "Missing hour"
				label values hour_group hour_group
				label variable hour_group "Hour of visit for last contact attempt"		
			byso cuid: egen hour_group=max(hour_group2)	
				
			keep contacts_firstinterview  cuid first_hour last_hour hour_group
			
			byso cuid: keep if _n==1
			save "${mypath}/data/cex_firstcontactattempts_`year'.dta", replace	
			
			
			
			
			***Create a contact method dataset
			clear
			if "`year'"=="08" {
				use "${mypath}/raw/intrvw`year'/fpar0809.dta", clear
			}
			else if "`year'"!="08" {
				use "${mypath}/raw/intrvw`year'/fpar`pyear'`year'.dta", clear
			}
		
		
			destring telpv how_intv, replace
			g mode=4
				replace mode=1 if how_intv==1 | telpv==1
				replace mode=2 if how_intv==6 | telpv==2
				replace mode=3 if how_intv>=2 & how_intv<=5
			
			label define mode 1 "In person" 2 "Telephone" 3 "Mixed" 4 "Unknown"
			label values mode mode
			
			g interview_length=tot_time/60
				label variable interview_length "Length of interview in minutes"
			g interview_length_group=0 if interview_length>=0 & interview_length<41.05
				replace interview_length_group=1 if interview_length>=41.05 & interview_length<57.85 
				replace interview_length_group=2 if interview_length>=57.85 & interview_length<80.15  
				replace interview_length_group=3 if interview_length>=80.15 & interview_length!=.  
				replace interview_length_group=. if interview_length==.
			label define interview_length 0 "0-41.05 mins" 1 " 41.05 -57.85" 2 "57.85-80.15" 3 "57.85+" 4 "Missing"
			label values interview_length_group interview_length
			
			keep mode newid interview_length interview_length_group
			save "${mypath}/data/cex_mode_`year'.dta", replace

			
			****Creating expenditure dataset 
			clear
			g year_new=.
			g quarter_new=""
			foreach quarter in 1x 2 3 4 {
				append using "${mypath}/raw/intrvw`year'/fmli`year'`quarter'.dta"
				replace year_new=`year' if year_new==.
				replace quarter_new="`quarter'" if quarter_new==""
			}
			save "${mypath}/data/fmli_combined_`year'.dta", replace
		
			merge 1:1 newid using "${mypath}/data/cex_contactattempts_`year'.dta"
			keep if _merge==3 | _merge==1 //keep if either matched with contact attempts or no match with contact attempts
			merge 1:1 newid using "${mypath}/data/cex_mode_`year'.dta", generate(mode_merge)
			keep if mode_merge==3
			*pause 
			
			*merge m:1 cuid using "${mypath}/data/cex_firstcontactattempts_`year'.dta", generate(firstattempts_merge)
			save "${mypath}/data/cex_temp_`year'.dta", replace
			
		}
		clear	
		foreach year in 08 09 10 11 12 13 {
		append using "${mypath}/data/cex_firstcontactattempts_`year'.dta"
		}
		byso cuid: keep if _n==1
		save "${mypath}/data/cex_firstattempts_allyears.dta",replace	
		*Combining across years
		clear
		foreach year in  08 09 10 11 12 13 {
			append using "${mypath}/data/cex_temp_`year'.dta"
			erase "${mypath}/data/cex_temp_`year'.dta"
			erase "${mypath}/data/cex_contactattempts_`year'.dta"
			erase "${mypath}/data/fmli_combined_`year'.dta"
		}
		merge m:1 cuid using "${mypath}/data/cex_firstattempts_allyears.dta", generate(firstattempts_merge)
		pause
		
	}
	
	else if "`dataset'"=="cps" {
		clear
		
		foreach year in 2012 2013 {
			forvalues month=1/12 {
				append using "${mypath}/raw/cpsb`year'_`month'.dta"
			
			}
		}	
	}
	
	***Save a merged version of each dataset to use below for quicker cleaning
	save "${mypath}/data/`dataset'_precleaned.dta", replace
	*/
	
	******Variable cleaning	
	use "${mypath}/data/`dataset'_precleaned.dta", clear
		*****Difficulty to reach

		if "`dataset'"=="brfss" {
			recode nattmpts (1=1) (2/3=2) (4/6=3) (nonmiss=4), g(difficulty)
				label define difficulty 1 "1 call" 2 "2-3 calls" 3 "4-6 calls" 4 "7 or more calls"
				label values difficulty difficulty
			
			g attempts_cleaned=nattmpts 

		}
		
		else if "`dataset'"=="cex" {
		
			recode contacts_firstinterview (1=1) (2=2) (3/4=3)  (nonmiss=4), g(difficulty) //note contacts is generated from the paradata above
				label define difficulty 1 "1 attempt" 2 "2 attempts" 3 "3-4 attempts" 4 "5 or more attempts"
				label values difficulty difficulty 
				label variable difficulty "Difficulty to reach at first interview"
		
			g attempts_cleaned=contacts_firstinterview if contacts_firstinterview!=0
			
			
				recode contacts_all (1=1) (2=2) (3/4=3)  (nonmiss=4), g(difficulty_all) //difficulty from the each interview
				label values difficulty_all difficulty 
				label variable difficulty_all "Difficulty to reach at that interview"
	
			
		}
		
		else if "`dataset'"=="cps" {
		
			recode  hupr (1=1) (2=2) (3/9=3) (0=4), g(difficulty)
				label define difficulty 1 "1 visit" 2 "2 visits" 3 "3 or more visits" 4 "Unknown"
				label values difficulty difficulty
			g attempts_cleaned=hupr if hupr!=0
			
			recode huinttyp (1=1) (2=2) (-1=3), g(mode)
			label define mode 1 "In person" 2 "Telephone" 3 "Unknown"
			label values mode mode
	
		}

		
		******Dependent variables
		
		if "`dataset'"=="brfss" {
			tostring height3, replace
			replace height3= "0" + height3 if length(height3)==3
			replace height3="" if height3=="." | height3=="7777" | height3=="9999"
			g sr_height=real(substr(height3,1,2))*12*0.0254 + real(substr(height3,3,2))*0.0254 if substr(height3,1,1)!="9" & substr(height3,1,1)!="7" //Converting height reported in feet to meters
			replace sr_height=real(substr(height3,2,3))/100 if substr(height3,1,1)=="9" //Handling heights reported in meters
			replace sr_height=. if sr_height >5 //Capping maximum height at 5 meters
				label variable sr_height "Self-reported height in meters"

			tostring weight2, replace
			replace weight2= "0" + weight2 if length(weight2)==3
			replace weight2= "00" + weight2 if length(weight2)==2
			replace weight2="" if weight2=="." | weight2=="7777" | weight2=="9999"
			g sr_weight=real(substr(weight2,2,3))/2.20462 if substr(weight2,1,1)!="9" & substr(weight2,1,1)!="7" //converting weights reported in pounds to kilograms
			replace sr_weight=real(substr(weight2,2,3)) if substr(weight2,1,1)=="9" 
				label variable sr_weight "Self reported weight in kilograms"

			g sr_bmi=_bmi5/100
				label variable sr_bmi "BMI based on self-reported height and weight"
			g sr_obese=_bmi5cat==4
				replace sr_obese=. if _bmi5cat==.
				label variable sr_obese "Obesity status based on self-reported BMI"
				
			g brfss_obese=_bmi5cat==4
				replace brfss_obese=. if _bmi5cat==.
		}
		else if "`dataset'"=="cex" {
			g total =totexppq + totexpcq
				label variable total "Total expenditures (last 3 months)"
			g ln_total=ln(1+total)
				label variable ln_total "Log total expenditures (last 3 months)"

			g health=healthpq+healthcq
				label variable health "Health expenditures (last 3 months), excludes non-prescription drugs"
			g ln_health=ln(1+health)
				label variable ln_health "Log health expenditures (last 3 months), excludes non-prescription drugs"

			g food=foodpq+foodcq
				label variable food "Food expenditures (last 3 months)"
			g ln_food=ln(1+food)
				label variable ln_food "Log food expenditures (last 3 months)"
		}
		else if "`dataset'"=="cps" {
			recode pemlr (1/2=1) (3/4=2) (5/7=3) (-1=.), g(employment)
				label define employ 1 "Employed" 2 "Unemployed" 3 "NILF"
				label values employment employ

			g employed=1 if employment==1
				replace employed=0 if employment==2 | employment==3
				label variable employed "Employed"
			g notemployed=1 if employment==2 | employment==3
				label variable notemployed "Either NILF or unemployed"
			g unemployed=1 if employment==2 
				replace unemployed=0 if employment==3 
				label variable unemployed "Unemployed, conditional on not being employed" 
			g unconditional_unemployed=1 if employment==2
				replace unconditional_unemployed=0 if employment==1 | employment==3
			g unemployment_rate=.
				replace unemployment_rate=1 if unemployed==1 
				replace unemployment_rate=0 if employed==1
			g laborforce=(employment==1 |employment==2)
			
			
		}
		
		*****Covariates
		***Education
		if "`dataset'"=="brfss" {
			recode educa (1/3=1) (4=2) (5=3) (6=4) (9=5) (miss=5), g(education)
				label define education 1 "Less than high school"  2 "High school" 3 "Some college" 4 "College or more" 5 "Missing educ."
				label values education education
		}
		
		else if "`dataset'"=="cex" {
			destring educ_ref, replace
			recode educ_ref (00 10 11 = 1) (12 =2) (13 14 =3) (15 16 17 =4) (miss=5), g(education)
			label variable education "Education of reference person"
			label define educ 1 "Less than high school" 2 "High school" 3 "Some college" 4 "College or more" 5 "Missing educ."
			label values education educ		
		}
		
		else if "`dataset'"=="cps" {
			recode peeduca (31/38=1) (39=2) (40/42=3) (43/46=4) (-1=5), g(education)
				label define education 1 "Less than high school" 2 "High School" 3 "Some college or associate's degree" 4 "Bachelor's degree or more" 5 "Missing educ."
				label values education education
		}
				
		***Income
		if "`dataset'"=="brfss" {
			recode income2 (1/4=1) (5/6=2) (7=3) (8=4) (77 99 =5) (miss=5), g(income)
				label define income 1 "Inc. below $25,000" 2 "Inc. $25,000-49,999" 3 "Inc. $50,000-74,999" 4 "Inc. $75,000 and up" 5 "Inc. missing"
				label values income income
		}
		
		else if "`dataset'"=="cex" {
			destring inclass, replace
			recode inclass (1 2 3 4 = 1) (5 6 =2) (7 8 =3) (9=4), g(income)
				label variable income "Consumer unit income (pre-tax)"
				label define income 1 "Inc. below $20,000" 2 "Inc. $20,000-$39,999" 3 "Inc. $40,000-69,999" 4 "Inc. 70,000 and up"
				label values income income
		}
		else if "`dataset'"=="cps" {
			recode hefaminc  (1/6=1) (7/10=2) (11/13=3) (14/16=4), g(income)
				label define income 1 "Inc. below $20,000" 2 "Inc. $20,000-$39,999" 3 "Inc. $40,000-74,999" 4 "Inc. 75,000 and up"
				label values income income	
	}		
		
		***Age
		if "`dataset'"=="brfss" {
			rename age age_original	
			recode age_original (18/39=1) (40/49=2) (50/59=3) (60/69=4) (7 9=6) (miss=6) (nonmiss=5), g(age)
				label variable age "Respondent age"
				label define age 1 "Age 18-39" 2 "Age 40-49" 3 "Age 50-59" 4 "Age 60-69" 5 "Age 70 and up" 6 "Age missing"
				label values age age		
				
				recode age_original (18/24=1) (25/29=2) (30/34=3) (35/39=4) (40/44=5) (45/49=6) (50/54=7) (55/59=8) (60/64=9) (7 9=11) (nonmiss=10) (miss=11) , g(age2)
				label variable age2 "Respondent age, more categories"
				label define age2 1 "Age 18-24" 2 "Age 25-29" 3 "Age 30-34" 4 "Age 35-39"  5 "Age 40-44"   6 "Age 45-49"  7 "Age 50-54"   8 "Age 55-59"  9 "Age 60-64"      10 "Age 65 and up" 11 "Age missing"
				label values age2 age2				
				
		}
		
		else if "`dataset'"=="cex" {
		destring age_ref, replace
		recode age_ref (16/29=1) (30/39=2) (40/49=3) (50/64=4) (65/99=5) , g(age) 
			label variable age "Reference person age"
			label define age 1 "Age 16-29" 2 "Age 30-39" 3 "Age 40-49" 4 "Age 50-64"  5 "Age 65 and up" 
			label values age age	
		
		drop age2		
		recode age_ref (16/24=1) (25/29=2) (30/34=3) (35/39=4) (40/44=5) (45/49=6) (50/54=7) (55/59=8) (60/64=9) (nonmiss=10) (miss=11) , g(age2)
			label variable age2 "Respondent age, more categories"
			label define age2 1 "Age 16-24" 2 "Age 25-29" 3 "Age 30-34" 4 "Age 35-39"  5 "Age 40-44"   6 "Age 45-49"  7 "Age 50-54"   8 "Age 55-59"  9 "Age 60-64"      10 "Age 65 and up" 11 "Age missing"
			label values age2 age2	
				
			
		}
		
		else if "`dataset'"=="cps" {
			recode prtage (-1=6) (16/19=1) (20/39=2) (40/49=3) (50/64=4) (65/99=5)  (0/15=7)  (miss=6), g(age) 
			recode peage (-1=6) (16/19=1) (20/39=2) (40/49=3) (50/64=4) (65/99=5)  (0/15=7)  (miss=6), g(age2)
			replace age=age2 if age==6 & age2!=.
			drop age2
		
			label variable age "Individual's age"
			label define age 1 "Age 16-19" 2 "Age 20-39" 3 "Age 40-49" 4 "Age 50-64" 5 "Age 65 and up" 6 "Age missing " 7 "Age 0 to 15"
			label values age age
			
			
			recode prtage (16/24=1) (25/29=2) (30/34=3) (35/39=4) (40/44=5) (45/49=6) (50/54=7) (55/59=8) (60/64=9) (0/15=12) (-1=11) (nonmiss=10)   (miss=11) , g(age2)
			recode peage (16/24=1) (25/29=2) (30/34=3) (35/39=4) (40/44=5) (45/49=6) (50/54=7) (55/59=8) (60/64=9) (0/15=12) (-1=11) (nonmiss=10)   (miss=11) , g(age3)
			replace age2=age3 if age2==11 & age2!=.
			drop age3
			
			
			label variable age2 "Respondent age, more categories"
				label define age2 1 "Age 16-24" 2 "Age 25-29" 3 "Age 30-34" 4 "Age 35-39"  5 "Age 40-44"   6 "Age 45-49"  7 "Age 50-54"   8 "Age 55-59"  9 "Age 60-64"      10 "Age 65 and up" 11 "Age missing"
				label values age2 age2	
			
			
			
		}
		
		***Sex
		if "`dataset'"=="brfss" {
			recode sex (1=0) (2=1), g(female)
				label variable female "Respondent's sex"
				label define female 0 "Male" 1 "Female"
				label values female female
		}
		
		else if "`dataset'"=="cex" {
			destring sex_ref, replace
			recode sex_ref (1=0) (2=1), g(female)
				label variable female "Reference person's sex"
				label define female 0 "Male" 1 "Female"
				label values female female
		}
		
		else if "`dataset'"=="cps" {
			recode pesex (1=0) (2=1) (-1=2), g(female) 
				label variable female "Individual's sex"
				label define female 0 "Male" 1 "Female" 2 "Missing sex"
				label values female female
		}		

		***Children
		if "`dataset'"=="brfss" {
			rename children children_original
			recode children_original (88=0) (1/87=1) (99=2) (miss=2), g(children)
				label define children 0 "No children under 18 in HH" 1 "1 or more children under 18 in HH" 2 "Missing child data"
				label values children children
		}
		
		else if "`dataset'"=="cex" {
			destring perslt18, replace
			recode perslt18 (0=0) (nonmiss=1), g(children)
				label define children 0 "No children under 18 in CU" 1 "1 or more children under 18 in CU"
				label values children children
		}
		
		else if "`dataset'"=="cps" {
			recode prnmchld (-1=0) (0=0) (1/12=1), g(children)
				label define children  0 "No children under 18 in HH" 1 "1 or more children under 18 in HH"
				label values children children
		}	
		
		***Race
		if "`dataset'"=="brfss" {
			recode _racegr2 (1=1) (2=2) (3/4=3) (5=5) (9=6) (miss=6), g(race)
				label define race 1 "White, non-hispanic" 2 "Black, non-hispanic" 3 "Other race, non-hispanic"  5 "Hispanic" 6 "missing"
				label values race race
		}
		
		else if "`dataset'"=="cex" {
			destring ref_race, replace
			recode ref_race (1=1) (2=2) (4=3) (nonmiss =4) (miss=5), g(race) 
			*Combined Native American, Pacific Islander, and Multi-race due to small cell size
				label variable race "Race of reference person"
				label define race 1 "White" 2 "Black" 3 "Asian" 4 "Other" 5 "Missing"
				label values race race
		}
		
		else if "`dataset'"=="cps" {
			recode ptdtrace (1=1) (2=2) (3=6) (4=4) (5=6) (6/26=6) (-1=7), g(race)
				label define race 1 "White" 2 "Black" 3 "American Indian" 4 "asian" 5 "Hawaiian/Pacific Islander" 6 "Other" 7 "Missing race"
				label values race race		
		}
		*/

		******Other variables (dataset specific)
			if "`dataset'"=="brfss" {
				g cell=(qstver>=20 & qstver<=23) 
					label define cell 0 "Interview conducted on landline" 1 "Interview conducted on cell"
					label values cell cell
				
				recode marital (9=99) (miss=99), g(marriage)

				g statenum=_state

				recode mscode (miss=99), g(urban_rural)
				
				recode fmonth (miss=99), g(interview_month)
				label variable interview_month "Interview month"
				
				encode  intvid, g(fi_id2)
				egen fi_id=group(statenum fi_id2)
				label variable fi_id "Interivewer ID"
				
				sort statenum fi_id diff
				by statenum fi_id: g new_diff=1 if diff[_n]!=diff[_n-1]
				byso statenum fi_id: egen total_diff=total(new_diff)
				
				byso statenum fi_id diff: gen count=_N
				byso statenum fi_id: egen count2=min(count)
				replace count2=0 if total_diff<4
				g fesample=(count2>=10 & count2!=.)
				label variable fesample "FI has at least 10 completes in every difficulty category"
				drop count count2
				
				recode employ (9=9) (miss=9), g(lforce)
				label variable lforce "Labor force status of respondent"
				label define lforce 1 "Employed for wages" 2 "Self-employed" 3 "Out of work for more than 1 year" 4 "Out of work for less than 1 year" 5 "A homemaker" 6 "A student" 7 "Retired" 8 "Unable to work" 9 "Missing" 
				}
			
			else if "`dataset'"=="cex" {
			
				*for unmatched hour_group data
				replace hour_group=5 if hour_group==.

				destring fam_size, replace
				recode fam_size (1=1) (2=2) (nonmiss=3), g(size)
					label variable size "Size of consumer unit"
					label define size 1 "1 member" 2 "2 members" 3 "3 or more members"
					label values size size
				
				destring marital1, replace				
				recode marital1 (miss=999), g(marriage)
					label variable marriage "Marital status of reference person"
					label define marriage 1 "Married" 2 "Widowed" 3 "Divorced" 4 "Separated" 5 "Never married" 999 "Missing"
					label values marriage marriage
				
				destring state, replace
				recode state (miss=999), g(statenum)
				replace statenum=999 if statenum==54 //replacing state with too few observations to estimate margins (missing cells)
				label variable statenum "State"
				
				destring smsastat, replace
				recode smsastat (miss=999), g(urban_rural)
				label variable urban "Urban/rural"
				label define urban 1 "Inside MSA" 2 "Outside MSA" 999 "Missing"
				label values urban urban
				
				destring qintrvmo, replace
				recode qintrvmo (miss=999), g(interview_month)
				label variable interview_month "Interview month"
				
				destring qintrvyr, replace
				recode qintrvyr (miss=999), g(interview_year)
				label variable interview_year "Interview year"
				
				
			}
			else if "`dataset'"=="cps" {
			recode puslfprx (1=1) (2=0) (3=0), g(self)
				label variable self "Was labor force participation completely filled out by self"
				label define self 1 "Completed by self" 2 "Completed by proxy or mix of self and proxy"
			
			recode puslfprx (2=1) (1=0) (3=0), g(nonself)
				label variable nonself "Was labor force participation completely filled out by proxy"
				label define nonself 1 "Completed by prox" 2 "Completed by self or mix of self and proxy"
			
			recode pemaritl (-1=999) (miss=999) , g(marriage)
				label variable marriage "Marital status"
				label define marriage 1 "Married-spouse present" 2 "Married - spouse absent" 3 "Widowed" 4 "Divorced" 5 "Seperated" 6 "Never married"
			
			recode gestfips (-1=999) (miss=999) , g(statenum)
				label variable statenum "State"
			
			recode gtmetsta (3=3) (-1=3) (miss=3), g(urban_rural)
				label variable urban_rural "Metropolotan status"
				label define urban_rural 1 "Metropolitan" 2 "Nonmetropolotin" 3 "Missing"
				label values urban_rural urban_rural
				
			recode hrmonth (-1=99) (miss=99) , g(interview_month)
				label variable interview_month "Interview month"
			
			recode hryear4  (-1=99) (miss=99)  , g(interview_year)
				label variable interview_year "Interview year"
				
				recode hrnumhou (1=1) (2=2) (3=3) (4=4) (nonmiss=5) (miss=6), g(size)
					label variable size "Size of HH"
					label define size 1 "1 member" 2 "2 members" 3 "3 members"  4 "4 members"  5 "5 or members" 6 "Missing size"
					label values size size				
			}
	*/	
		
	*****Create cross tabs if  global tables set to "on"
	if "${tables}"=="on" {
		log using "${mypath}/data/`dataset'_crosstabs", text replace
		if "`dataset'"=="brfss" {
			su sr_height, det
			su sr_weight, det
			bigtab education educa
			bigtab income income2
			bigtab age age_original
			bigtab female sex
			bigtab children children_original
			bigtab race _racegr2
			bigtab cell  qstver
			bigtab marriage marital
			bigtab statenum _state
			bigtab urban_rural mscode
		}
		else if "`dataset'"=="cex" {
				su total totexppq totexpcq ln_total health healthpq healthcq ln_health food foodpq foodcq ln_food, det
				bigtab education educ_ref
				bigtab income inclass
				bigtab age age_ref
				bigtab female sex_ref
				bigtab children perslt18
				bigtab race ref_race
				bigtab size fam_size
		
		}
		else if "`dataset'"=="cps" {
				bigtab employment pemlr 
				bigtab employed pemlr
				bigtab unemployed pemlr
				bigtab education peeduca
				bigtab age prtage
				bigtab female pesex
				bigtab children prnmchld
				bigtab race ptdtrace

		}
	
	log close
		
		
	
	}	
		
		
	****Limit dataset to primary analysis sample and variables

	if "`dataset'"=="brfss" {
		drop if sr_obese==. //drop people missing self reported height and weight
		drop if pregnant==1 //drop pregnant individuals
		
		keep child* age* lsatis* female inc* marriage education race state* urban_rural sr_*  cell  natt difficulty attempts_cleaned *bmi* _llcpwt brfss_obese interview_month fi_id lforce fesample
	}
	
	else if "`dataset'"=="cex" {
		keep if _merge==3 //Drop people missing contact attempts
	}
	
	else if "`dataset'"=="cps" {
	
		drop if age==7 //Drops those younger than 16
		keep if hrmis==1 //Only keep first month in sample
		keep if employment!=. //Only keep those with employment information and considered to be in the labor force
		
	}
	*/
	
	****Save final analysis dataset
	save ${mypath}/data/`dataset'_analysis, replace
	*/
		
	
}


















